true
# setwd("/Volumes/home/greally-lab/Claudia_Andrew/CRISPR_Proj_combined")

# load in libraries 
library(DESeq2)
library(EDASeq)
library(matrixStats)
library(RUVSeq)
library(qvalue)
library(genefilter)
library(RColorBrewer)
library(pheatmap)
library(UpSetR)
library(RFmarkerDetector)
library(ggplot2)
library(ggthemes)

# set options
options(scipen=999, stringsAsFactors = FALSE)

The goal of this study was to 1) assess the robust activation of CD34 upon transient transfection of HEK 293T cells with a CRISPR-deadCas9-VP128 and 2) to assess the potential off-target effects of this method. We perform a simple differential expression analysis among our three treatments_r1: control, tranfection with dCas9-VP128 w/o gRNAs, and transfection with dCas9-VP128 and gRNAs targetted to the promoter of CD34. We perform analysis of each replicate separately and then combined.

The RNA-seq libraries were prepared similarly with ERCC spike-ins. However, the spike-ins could not be used to normalize the libraries due to their low count number; this most likely is due to the spike-ins not being added at a high enough concentration. Another important caveat to the libraries is that in the first replicate the control library was generated from cells passed through the FACS machine, whereas the in second replicate, the control cells were not passed through the FACS machine.

First we read in the two biological replicates (and their two technical reps for each treatment).

# reading in and merging the counts

## selecting the files ot read in
files <- grep(pattern = "ReadsPerGene.out.tab", x = list.files(), value = TRUE)

files
##  [1] "1A.1ReadsPerGene.out.tab"       "1minus.1ReadsPerGene.out.tab"  
##  [3] "2Aplus.1ReadsPerGene.out.tab"   "2plus.1ReadsPerGene.out.tab"   
##  [5] "3AQ2.1ReadsPerGene.out.tab"     "3plus.1ReadsPerGene.out.tab"   
##  [7] "CD34_1.1ReadsPerGene.out.tab"   "CD34_2.1ReadsPerGene.out.tab"  
##  [9] "CRISPR_1.1ReadsPerGene.out.tab" "CRISPR_2.1ReadsPerGene.out.tab"
## [11] "Ctrl_1.1ReadsPerGene.out.tab"   "Ctrl_2.1ReadsPerGene.out.tab"
list_counts <- list()
for (i in 1:length(files)){
  list_counts[[i]] <- read.table(paste(files[i]))
  if (i < 2) {
    df_counts <- list_counts[[1]][,c(1,4)]
  }
  else {
    df_counts <- merge(df_counts, list_counts[[i]][,c(1,4)], by = "V1")
  }
}
dim(df_counts) #  60700    13
## [1] 60700    13
## remove the ambiguous, multimapp, no feature, and unmapped read totals
df_counts <- df_counts[-c(60697:60700),]
rownames(df_counts) <- df_counts[,1]
df_counts <- df_counts[,-1]

colnames(df_counts) <- c("Ctrl_1_1", "Ctrl_1_2", "CRISPR_1_1", "CRISPR_1_2", "CD34_1_1",
                         "CD34_1_2", "CD34_2_1", "CD34_2_2", "CRISPR_2_1","CRISPR_2_2",
                         "Ctrl_2_1", "Ctrl_2_2")
head(df_counts)
##                 Ctrl_1_1 Ctrl_1_2 CRISPR_1_1 CRISPR_1_2 CD34_1_1 CD34_1_2
## ENSG00000000003     1915     2792       1970       1439     1937     2008
## ENSG00000000005        0        3          0          1        0        0
## ENSG00000000419      601      770       1036        693      873      924
## ENSG00000000457      342      389        397        234      302      352
## ENSG00000000460      637      867        786        504      670      663
## ENSG00000000938        1        0          2          0        1        1
##                 CD34_2_1 CD34_2_2 CRISPR_2_1 CRISPR_2_2 Ctrl_2_1 Ctrl_2_2
## ENSG00000000003     2104     1654       1935       2562     2205     2708
## ENSG00000000005        0        0          1          0        0        1
## ENSG00000000419     1037      774        982       1227      696      762
## ENSG00000000457      341      278        311        453      288      396
## ENSG00000000460      796      571        739        896      683      803
## ENSG00000000938        1        0          7          0        0        0

Next we filter the RNAs to be analzyed. First, we apply a simple filter for only those RNAs that are expressed at high levels. The RNA must have at least 5 counts in four of the samples, thus allowing only genes expressed by only one treatment group to be retained. Next, we filter for protein coding genes only or protein coding and long non-coding RNAs.

# expression filter
idx_filt_exp_com <- apply(df_counts, 1, function(x) length(x[x>5])>=4) 
head(idx_filt_exp_com)
## ENSG00000000003 ENSG00000000005 ENSG00000000419 ENSG00000000457 
##            TRUE           FALSE            TRUE            TRUE 
## ENSG00000000460 ENSG00000000938 
##            TRUE           FALSE
filtered_com <- df_counts[idx_filt_exp_com,]
dim(filtered_com) # 19,244     12
## [1] 19244    12
# remove spike ins
spikes_com <- grep("ERCC", rownames(filtered_com))
length(spikes_com) # 12
## [1] 12
filterd_noSpike_com <- filtered_com[-spikes_com,]
dim(filterd_noSpike_com) # 19232 12
## [1] 19232    12
# filter for only protein coding RNAs
prot_ensg_ID <- read.table("../../indexes/Hg38_rel79_ERCC/prot_ENSG.txt")
dim(prot_ensg_ID) # 22002     1
## [1] 22002     1
filterd_noSpike_pc_com <- filterd_noSpike_com[
  rownames(filterd_noSpike_com) %in% prot_ensg_ID$V1,]
dim(filterd_noSpike_pc_com) # 14260    6
## [1] 14260    12

Let’s look at the PCA and RLE plots.

# resorting the columns so that controls, CRISPRs, and CD34s are next to each other
filterd_noSpike_pc_com <- filterd_noSpike_pc_com[,c(1,2,11,12,3,4,9,10,5,6,7,8)]

# set a factor for different treatments
treatments_com <- as.factor(rep(c("Ctrl","CRISPR","CD34"), each=4)) 
treatments_com <- relevel(treatments_com, c("Ctrl"))
replicates_com <- as.factor(rep(c("Rep1", "Rep2", "Rep1", "Rep2", "Rep1", "Rep2"),each=2))

# create expression sets 
eset_pc_com <- newSeqExpressionSet(as.matrix(filterd_noSpike_pc_com),
                                  phenoData = data.frame(treatments_com, 
                                                   row.names=colnames(filterd_noSpike_pc_com)))

# choose a color set
colors_com <- brewer.pal(6, "Dark2")
colors <- brewer.pal(3, "Dark2")


# Make RLE plots
plotRLE(eset_pc_com, outline=FALSE, ylim=c(-4, 4), col=colors[treatments_com],
        main="Protein coding RNAs before normalization") 

limma::plotMDS(counts(eset_pc_com), dim=c(2,3))

# Make PCA plots
plotPCA(eset_pc_com, col=colors[treatments_com], cex=1.2, 
        main = "Protein coding RNAs before normalization")

plotPCA(eset_pc_com, k=3, col=colors[treatments_com], cex=1.2, 
        main="Protein coding RNAs before normalization") 

plotPCA(eset_pc_com, k=3, col=colors[replicates_com], cex=1.2, 
        main="Protein coding RNAs before normalization") 

The different treatment groups cluster together as expected except for CRISPR_1_1. Also PC3 seems to be driven by replicate (1v2).

Next we normalize based on housekeeping gene expression. House keeping genes were identified by a previous study “Human housekeeping genes revisited” E. Eisenberg and E.Y. Levanon, Trends in Genetics, 29 (2013) and a list is avaialble for download at (https://www.tau.ac.il/~elieis/HKG/). We took only the bottom quartile variance house keeping genes to use for normalization.

# read in house keeping genes 
HK_genes <- read.table("HK_ensembl_ID.txt")
dim(HK_genes) # 4202    1
## [1] 4202    1
# grab the HK genes from RNAs being analyzed 
HK_pc_com <- filterd_noSpike_pc_com[which(rownames(filterd_noSpike_pc_com) %in% HK_genes[,1]),]
dim(HK_pc_com) # 3753    12
## [1] 3753   12
# examine the variance of the HK genes and take only the bottom 1000 genes to normalize with
## for protein coding RNAs only
HK_pc_com_rsd <- apply(as.matrix(HK_pc_com), 1, rsd)
boxplot(HK_pc_com_rsd)

summary(HK_pc_com_rsd)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   12.62   18.24   20.33   21.56   23.32   83.66
HK_pc_lowRSD <- sort(HK_pc_com_rsd)[1:1000]

# Normalize using the house keeping genes
eset_pc_norm_com <- RUVg(eset_pc_com, names(HK_pc_lowRSD), k=1) 

# The weights have been added to the phenotype data
pData(eset_pc_norm_com)
##            treatments_com         W_1
## Ctrl_1_1             Ctrl -0.23181281
## Ctrl_1_2             Ctrl  0.30140064
## Ctrl_2_1             Ctrl -0.15068670
## Ctrl_2_2             Ctrl  0.19543575
## CRISPR_1_1         CRISPR  0.23101365
## CRISPR_1_2         CRISPR -0.50824987
## CRISPR_2_1         CRISPR -0.00221291
## CRISPR_2_2         CRISPR  0.51117416
## CD34_1_1             CD34 -0.00075488
## CD34_1_2             CD34  0.07351361
## CD34_2_1             CD34  0.04398612
## CD34_2_2             CD34 -0.46280677
# Make RLE plots
plotRLE(eset_pc_norm_com, outline=FALSE, ylim=c(-4, 4), col=colors[treatments_com],
        main="Protein coding RNAs after normalization") 

# Make PCA plots
plotPCA(eset_pc_norm_com, col=colors[treatments_com], cex=1.2, 
        main = "Protein coding RNAs after normalization")

plotPCA(eset_pc_norm_com, k=3, col=colors[treatments_com], cex=1.2, 
        main="Protein coding RNAs after normalization") 

Now PC1 captures the differences between the transfected and non-transfected treatments. PC2 is now based upon replicate. PC3 captures the differences among the treatments perfectly. We will continue with this since the replicate will be included in the DEseq model.

Next, we perform the differential expression among the different treatments

#adding the replicates information to the pData
pData(eset_pc_norm_com) <- cbind(pData(eset_pc_norm_com), replicates_com)

# convert the expression set to a DESeq object
dds_pc_com <- DESeqDataSetFromMatrix(countData = counts(eset_pc_norm_com), 
                                    colData = pData(eset_pc_norm_com), 
                                    design = ~W_1 + replicates_com + treatments_com)

# Run DESeq Wald tests
dds_pc_com <- DESeq(dds_pc_com)

# generate results among the different treatments_com and set a log fold change threshold of 1
ruv_res_con_CD34_pc_com <- results(dds_pc_com, lfcThreshold=1, altHypothesis="greaterAbs", 
                       contrast = c("treatments_com", "CD34", "Ctrl"), alpha=0.05)

ruv_res_con_crisp_pc_com <- results(dds_pc_com, lfcThreshold=1, altHypothesis="greaterAbs",
                             contrast = c("treatments_com", "CRISPR", "Ctrl"), alpha=0.05)

ruv_res_crisp_CD34_pc_com <- results(dds_pc_com, lfcThreshold=1, altHypothesis="greaterAbs", 
                                 contrast = c("treatments_com", "CD34", "CRISPR"), alpha=0.05)

# draw MA plots of results 
## draw horizontal lines for log fold change threshold
drawLines <- function() abline(h=c(-1,1),col="dodgerblue",lwd=2)
ylim<-c(-8,8)

##draw the MA plots
DESeq2::plotMA(ruv_res_con_CD34_pc_com, 
               main="RUV Ctrl vs CD34 PC-com", ylim=ylim); drawLines()

DESeq2::plotMA(ruv_res_con_crisp_pc_com, 
               main="RUV Ctrl vs CRISPR PC-com", ylim=ylim); drawLines()

DESeq2::plotMA(ruv_res_crisp_CD34_pc_com,
               main="RUV CRISPR vs CD34 PC-com", ylim=ylim); drawLines()

# ENSG00000174059 is CD34 ENSG ID 
ruv_res_con_CD34_pc_com["ENSG00000174059",]
## log2 fold change (MLE): treatments_com CD34 vs Ctrl 
## Wald test p-value: treatments com CD34 vs Ctrl 
## DataFrame with 1 row and 6 columns
##                  baseMean log2FoldChange     lfcSE      stat
##                 <numeric>      <numeric> <numeric> <numeric>
## ENSG00000174059  310.1427        5.60014 0.1811934  25.38801
##                                                                                                                                                                 pvalue
##                                                                                                                                                              <numeric>
## ENSG00000174059 0.0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000003420907
##                                                                                                                                                               padj
##                                                                                                                                                          <numeric>
## ENSG00000174059 0.000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000004878214
ruv_res_con_crisp_pc_com["ENSG00000174059",]
## log2 fold change (MLE): treatments_com CRISPR vs Ctrl 
## Wald test p-value: treatments com CRISPR vs Ctrl 
## DataFrame with 1 row and 6 columns
##                  baseMean log2FoldChange     lfcSE      stat    pvalue
##                 <numeric>      <numeric> <numeric> <numeric> <numeric>
## ENSG00000174059  310.1427      -1.236545 0.3046578  -0.77643 0.4374951
##                      padj
##                 <numeric>
## ENSG00000174059         1
ruv_res_crisp_CD34_pc_com["ENSG00000174059",]
## log2 fold change (MLE): treatments_com CD34 vs CRISPR 
## Wald test p-value: treatments_com CD34 vs CRISPR 
## DataFrame with 1 row and 6 columns
##                  baseMean log2FoldChange     lfcSE      stat
##                 <numeric>      <numeric> <numeric> <numeric>
## ENSG00000174059  310.1427       6.836686 0.2658514  21.95469
##                                                                                                                              pvalue
##                                                                                                                           <numeric>
## ENSG00000174059 0.00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000007810719
##                                                                                                                           padj
##                                                                                                                      <numeric>
## ENSG00000174059 0.000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001113808
# looking at the 
summary(ruv_res_con_CD34_pc_com) # 116 up and 36 down
## 
## out of 14260 with nonzero total read count
## adjusted p-value < 0.05
## LFC > 0 (up)     : 123, 0.86% 
## LFC < 0 (down)   : 38, 0.27% 
## outliers [1]     : 0, 0% 
## low counts [2]   : 0, 0% 
## (mean count < 3)
## [1] see 'cooksCutoff' argument of ?results
## [2] see 'independentFiltering' argument of ?results
summary(ruv_res_con_crisp_pc_com) # 104 up and 22 down
## 
## out of 14260 with nonzero total read count
## adjusted p-value < 0.05
## LFC > 0 (up)     : 103, 0.72% 
## LFC < 0 (down)   : 22, 0.15% 
## outliers [1]     : 0, 0% 
## low counts [2]   : 0, 0% 
## (mean count < 3)
## [1] see 'cooksCutoff' argument of ?results
## [2] see 'independentFiltering' argument of ?results
summary(ruv_res_crisp_CD34_pc_com) # only CD34 up!
## 
## out of 14260 with nonzero total read count
## adjusted p-value < 0.05
## LFC > 0 (up)     : 1, 0.007% 
## LFC < 0 (down)   : 0, 0% 
## outliers [1]     : 0, 0% 
## low counts [2]   : 0, 0% 
## (mean count < 3)
## [1] see 'cooksCutoff' argument of ?results
## [2] see 'independentFiltering' argument of ?results
# grabbing the ENSG IDs from the differentially expressed genes 
ruv_res_con_CD34_pc_com_nona <- ruv_res_con_CD34_pc_com[!is.na(ruv_res_con_CD34_pc_com$padj),]
ruv_res_con_CD34_pc_com_IDs <-
  rownames(ruv_res_con_CD34_pc_com_nona)[ruv_res_con_CD34_pc_com_nona$padj<0.05]
length(ruv_res_con_CD34_pc_com_IDs) #161
## [1] 161
num_CD34Vctrl <- length(ruv_res_con_CD34_pc_com_IDs) 

ruv_res_con_crisp_pc_com_IDs <-
  rownames(ruv_res_con_crisp_pc_com)[ruv_res_con_crisp_pc_com$padj<0.05]
length(ruv_res_con_crisp_pc_com_IDs) #125
## [1] 125
num_crispVctrl <- length(ruv_res_con_crisp_pc_com_IDs) 

sum(ruv_res_con_CD34_pc_com_IDs %in% ruv_res_con_crisp_pc_com_IDs) # 97 are the same 
## [1] 97
num_overlaps <- sum(ruv_res_con_CD34_pc_com_IDs %in% ruv_res_con_crisp_pc_com_IDs)

ruv_res_pc_com_IDs <- ruv_res_con_CD34_pc_com_IDs[(ruv_res_con_CD34_pc_com_IDs %in% ruv_res_con_crisp_pc_com_IDs)]
ruv_res_con_CD34_pc_com_sig <- as.data.frame(ruv_res_con_CD34_pc_com[rownames(ruv_res_con_CD34_pc_com) %in% ruv_res_pc_com_IDs,])
ruv_res_con_CD34_pc_com_sig$Ensembl_ID <- rownames(ruv_res_con_CD34_pc_com_sig)

ruv_res_con_crisp_pc_com_sig <- as.data.frame(ruv_res_con_crisp_pc_com[rownames(ruv_res_con_crisp_pc_com) %in% ruv_res_pc_com_IDs,])
ruv_res_con_crisp_pc_com_sig$Ensembl_ID <- rownames(ruv_res_con_crisp_pc_com_sig)

ruv_res_con_C34_CRISPR_pc_com_sig <- merge(x = ruv_res_con_CD34_pc_com_sig,
                                           y = ruv_res_con_crisp_pc_com_sig[,-1],
                                           by = "Ensembl_ID", all=T)
write.table(ruv_res_con_C34_CRISPR_pc_com_sig, "ruv_res_con_C34_CRISPR_pc_com_sig2.txt",
              row.names = F, col.names = TRUE, sep="\t", append=F, quote = F)
write.csv(ruv_res_con_C34_CRISPR_pc_com_sig, "ruv_res_con_C34_CRISPR_pc_com_sig2.txt",
              row.names = F, col.names = TRUE, quote = F)

Let’s make an upset plot with the combined and the individual replicates.

# Setting up the dataframe for the upset plot. 
ruv_res_con_CD34_pc_com_IDs_upsetdf <- data.frame(Gene=ruv_res_con_CD34_pc_com_IDs,
                                                  Com_ctrl_v_CD34=1)
ruv_res_con_crisp_pc_com_IDs_upsetdf <- data.frame(Gene=ruv_res_con_crisp_pc_com_IDs,
                                                  Com_ctrl_v_CRISPR=1)
Combined_upsetdf <- merge(ruv_res_con_CD34_pc_com_IDs_upsetdf, 
                            ruv_res_con_crisp_pc_com_IDs_upsetdf,
                            by="Gene", all=T)
dim(Combined_upsetdf) # 189   3
## [1] 189   3
Combined_upsetdf_nona <- replace(Combined_upsetdf,is.na(Combined_upsetdf),0)

sum(duplicated(Combined_upsetdf_nona$Gene))
## [1] 0
upset(Combined_upsetdf_nona, nsets = 2, number.angles = 0, point.size = 3.5, 
      line.size = 1.5, mainbar.y.label = "DE Gene Intersections", 
      sets.x.label = "# of DE Genes", text.scale = c(1.3, 1, 1, 1, 1, 1), order.by = "freq")

# I would look at the gene ontology enrichment of the combined overlapping 97 genes. 
saveRDS(ruv_res_pc_com_IDs, "ruv_res_pc_com_IDs2.rds")
ruv_res_pc_com_IDs
##  [1] "ENSG00000006128" "ENSG00000006327" "ENSG00000008517"
##  [4] "ENSG00000031698" "ENSG00000046774" "ENSG00000049249"
##  [7] "ENSG00000052749" "ENSG00000070669" "ENSG00000077009"
## [10] "ENSG00000077150" "ENSG00000078237" "ENSG00000087074"
## [13] "ENSG00000087245" "ENSG00000095370" "ENSG00000099860"
## [16] "ENSG00000101255" "ENSG00000104112" "ENSG00000104856"
## [19] "ENSG00000105499" "ENSG00000109089" "ENSG00000111981"
## [22] "ENSG00000115129" "ENSG00000115414" "ENSG00000115461"
## [25] "ENSG00000119922" "ENSG00000120738" "ENSG00000122733"
## [28] "ENSG00000124194" "ENSG00000124253" "ENSG00000128965"
## [31] "ENSG00000130513" "ENSG00000133169" "ENSG00000134986"
## [34] "ENSG00000135423" "ENSG00000136235" "ENSG00000137090"
## [37] "ENSG00000138271" "ENSG00000139292" "ENSG00000139364"
## [40] "ENSG00000141756" "ENSG00000144452" "ENSG00000145147"
## [43] "ENSG00000147257" "ENSG00000148677" "ENSG00000149131"
## [46] "ENSG00000151012" "ENSG00000152137" "ENSG00000153233"
## [49] "ENSG00000153879" "ENSG00000155495" "ENSG00000159307"
## [52] "ENSG00000160712" "ENSG00000161011" "ENSG00000163121"
## [55] "ENSG00000163661" "ENSG00000165323" "ENSG00000165655"
## [58] "ENSG00000166900" "ENSG00000167767" "ENSG00000168003"
## [61] "ENSG00000168394" "ENSG00000168542" "ENSG00000169393"
## [64] "ENSG00000169429" "ENSG00000169896" "ENSG00000171223"
## [67] "ENSG00000171951" "ENSG00000173093" "ENSG00000173276"
## [70] "ENSG00000175197" "ENSG00000175592" "ENSG00000175868"
## [73] "ENSG00000176046" "ENSG00000176406" "ENSG00000176746"
## [76] "ENSG00000179046" "ENSG00000180447" "ENSG00000181026"
## [79] "ENSG00000182916" "ENSG00000183696" "ENSG00000185437"
## [82] "ENSG00000186115" "ENSG00000186340" "ENSG00000186529"
## [85] "ENSG00000187608" "ENSG00000187775" "ENSG00000187955"
## [88] "ENSG00000188211" "ENSG00000196878" "ENSG00000197355"
## [91] "ENSG00000197558" "ENSG00000214212" "ENSG00000223638"
## [94] "ENSG00000229292" "ENSG00000243955" "ENSG00000254415"
## [97] "ENSG00000265972"
any(ruv_res_pc_com_IDs == "ENSG00000116721")
## [1] FALSE

Making a diagram of the overlap between treatment comparisons

library(VennDiagram)
grid.newpage()
draw.pairwise.venn(num_crispVctrl, num_CD34Vctrl, num_overlaps, 
                   category = c("CRISPR v. Ctrl", "CD34 v. Ctrl"), 
                   lty = rep("blank", 2), fill = c("#7570B3", "#D95F02"),
                   alpha = rep(0.5, 2), cat.pos = c(0, 0), 
                   cat.dist = rep(0.025, 2))

## (polygon[GRID.polygon.103], polygon[GRID.polygon.104], polygon[GRID.polygon.105], polygon[GRID.polygon.106], text[GRID.text.107], text[GRID.text.108], text[GRID.text.109], text[GRID.text.110], text[GRID.text.111])
pdf("Compare_VD_1.pdf", width=6, height = 4, family="ArialMT")
draw.pairwise.venn(num_crispVctrl, num_CD34Vctrl, num_overlaps, 
                   category = c("CRISPR v. Ctrl", "CD34 v. Ctrl"), 
                   lty = rep("blank", 2), fill = c("#7570B3", "#D95F02"),
                   alpha = rep(0.5, 2), cat.pos = c(0, 0), 
                   cat.dist = rep(0.025, 2))
## (polygon[GRID.polygon.112], polygon[GRID.polygon.113], polygon[GRID.polygon.114], polygon[GRID.polygon.115], text[GRID.text.116], text[GRID.text.117], text[GRID.text.118], text[GRID.text.119], text[GRID.text.120])
dev.off()
## quartz_off_screen 
##                 2

Checking to make sure that the chosen qPCR validation targets are within the 97 genes overlapping between the comparisons.

# in order: NFKB2, TRIB3, RelB, EGR1, JUNB, DDIT3, fosl1
qPCR_IDs <- c("ENSG00000077150", "ENSG00000101255", "ENSG00000104856", 
              "ENSG00000120738", "ENSG00000171223", "ENSG00000175197",
              "ENSG00000175592") 
length(qPCR_IDs) # 7
## [1] 7
# check IDs
sum(ruv_res_pc_com_IDs %in% qPCR_IDs) #7
## [1] 7

Tried to manipulate the MAplot function code to make figures.. but it didn’t work. This next code chunk is not evaluated but is kept for scratch work.

t_col <- function(color, percent = 50, name = NULL) {
#     color = color name
#   percent = % transparency
#      name = an optional name for the color
## Get RGB values for named color
  rgb.val <- col2rgb(color)
## Make new color using input color as base and alpha set by transparency
  t.col <- rgb(rgb.val[1], rgb.val[2], rgb.val[3],
               max = 255,
               alpha = (100-percent)*255/100,
               names = name)
## Save the color
  invisible(t.col)
}
lt_black <- t_col("black", percent = 50, name = "black_t")

 # red for CD34
 # purple for similar?
 # blue for significant?
 # CRISPR sig #7570B3
 # CD34 sig #D95F02

plotMA_AJ <- function( object, ylim = NULL,
  colNonSig = "gray32", colSig = "red3", colLine = "#ff000080",
  log = "x", cex=0.45, xlab="mean expression", ylab="log fold change", ... )
{
   if( !( ncol(object) == 3 & inherits( object[[1]], "numeric" ) & inherits( object[[2]], "numeric" )
         & inherits( object[[3]], "logical" ) ) ) {
      stop( "When called with a data.frame, plotMA expects the data frame to have 3 columns, two numeric ones for mean and log fold change, and a logical one for significance.")
   }
   colnames(object) <- c( "mean", "lfc", "sig" )
   object = subset( object, mean != 0 )
   py = object$lfc
   if( is.null(ylim) )
      ylim = c(-1,1) * quantile(abs(py[is.finite(py)]), probs=0.99) * 1.1
   plot(object$mean, pmax(ylim[1], pmin(ylim[2], py)),
       log=log, pch=ifelse(py<ylim[1], 6, ifelse(py>ylim[2], 2, 16)),
       cex=cex, col=ifelse( object$sig, colSig, colNonSig ), xlab=xlab, ylab=ylab, ylim=ylim, ...)
  abline( h=0, lwd=4, col= lt_black )
}



colNonSig = "gray32"
ylim <- c(-6,6)
 # color non-sig as gray
col_ruv_res_con_CD34_pc_com <- rep(t_col("gray32", percent = 10, name = "lt.gray32"), nrow(ruv_res_con_CD34_pc_com))
head(col_ruv_res_con_CD34_pc_com)
 # color CD34 sig as orange
col_ruv_res_con_CD34_pc_com[which(ruv_res_con_CD34_pc_com$padj < 0.05)] <- "#D95F02"
 # color shared sig as deep purple
col_ruv_res_con_CD34_pc_com[rownames(ruv_res_con_CD34_pc_com) %in% ruv_res_pc_com_IDs] <- "#551a8b"
 # color qPCR targets green
col_ruv_res_con_CD34_pc_com[rownames(ruv_res_con_CD34_pc_com) %in% qPCR_IDs] <- "#006633"
 # color CD34 sig as red
col_ruv_res_con_CD34_pc_com[rownames(ruv_res_con_CD34_pc_com) %in% "ENSG00000174059"] <- "red3"

pch_ruv_res_con_CD34_pc_com <- rep(20, nrow(ruv_res_con_CD34_pc_com))
pch_ruv_res_con_CD34_pc_com[col_ruv_res_con_CD34_pc_com != col_ruv_res_con_CD34_pc_com[1]] <- 19
pch_ruv_res_con_CD34_pc_com[rownames(ruv_res_con_CD34_pc_com) %in% qPCR_IDs] <- 17

plot(ruv_res_con_CD34_pc_com$baseMean, pmax(ylim[1], pmin(ylim[2], ruv_res_con_CD34_pc_com$log2FoldChange)),
       log="x", pch=pch_ruv_res_con_CD34_pc_com,
       cex=0.45, col=col_ruv_res_con_CD34_pc_com, 
     xlab="mean expression",
     ylab="log fold change",
     ylim=ylim)
abline(h=0, lwd=4, col=lt_black)

drawLines <- function() abline(h=c(-1,1),col="dodgerblue",lwd=2)

head(ruv_res_con_CD34_pc_com)

DESeq2::plotMA(ruv_res_con_CD34_pc_com, 
               main="RUV Ctrl vs CD34 PC-com", ylim=ylim); drawLines()

Using the base plotting package, I didn’t have enough control over the size of the points (as well as the transparency), so I’ll make the MA plot using the ggplot package.

head(ruv_res_con_CD34_pc_com)
## log2 fold change (MLE): treatments_com CD34 vs Ctrl 
## Wald test p-value: treatments com CD34 vs Ctrl 
## DataFrame with 6 rows and 6 columns
##                    baseMean log2FoldChange      lfcSE      stat    pvalue
##                   <numeric>      <numeric>  <numeric> <numeric> <numeric>
## ENSG00000000003 2063.928940    -0.13312402 0.04538688         0         1
## ENSG00000000419  856.072924     0.52185225 0.05863385         0         1
## ENSG00000000457  334.052490     0.03406567 0.08978050         0         1
## ENSG00000000460  706.150361     0.03311635 0.06495760         0         1
## ENSG00000000971    9.708977    -0.37529870 0.50982271         0         1
## ENSG00000001036 1407.097579     0.36360258 0.04733630         0         1
##                      padj
##                 <numeric>
## ENSG00000000003         1
## ENSG00000000419         1
## ENSG00000000457         1
## ENSG00000000460         1
## ENSG00000000971         1
## ENSG00000001036         1
color_genes <- c("#000000", "#FF0000", "#0432FF", "#FF40FF", "#548235", "#FF9300", "#942093", "#00FDFF")

# color CD34 red
ruv_res_con_CD34_pc_com$color <- 1
ruv_res_con_CD34_pc_com$color[rownames(ruv_res_con_CD34_pc_com)=="ENSG00000174059"] <- 2

# make non-DE transparent
ruv_res_con_CD34_pc_com$trans <- 0.1
ruv_res_con_CD34_pc_com$trans[which(ruv_res_con_CD34_pc_com$padj < 0.05)] <- 1 

# make CD34 larger
ruv_res_con_CD34_pc_com$size <- 1
ruv_res_con_CD34_pc_com$size[rownames(ruv_res_con_CD34_pc_com)=="ENSG00000174059"] <- 1.5

ruv_res_con_CD34_pc_com_df <- as.data.frame(ruv_res_con_CD34_pc_com)
head(ruv_res_con_CD34_pc_com_df)
##                    baseMean log2FoldChange      lfcSE stat pvalue padj
## ENSG00000000003 2063.928940    -0.13312402 0.04538688    0      1    1
## ENSG00000000419  856.072924     0.52185225 0.05863385    0      1    1
## ENSG00000000457  334.052490     0.03406567 0.08978050    0      1    1
## ENSG00000000460  706.150361     0.03311635 0.06495760    0      1    1
## ENSG00000000971    9.708977    -0.37529870 0.50982271    0      1    1
## ENSG00000001036 1407.097579     0.36360258 0.04733630    0      1    1
##                 color trans size
## ENSG00000000003     1   0.1    1
## ENSG00000000419     1   0.1    1
## ENSG00000000457     1   0.1    1
## ENSG00000000460     1   0.1    1
## ENSG00000000971     1   0.1    1
## ENSG00000001036     1   0.1    1
str(ruv_res_con_CD34_pc_com_df)
## 'data.frame':    14260 obs. of  9 variables:
##  $ baseMean      : num  2063.93 856.07 334.05 706.15 9.71 ...
##  $ log2FoldChange: num  -0.1331 0.5219 0.0341 0.0331 -0.3753 ...
##  $ lfcSE         : num  0.0454 0.0586 0.0898 0.065 0.5098 ...
##  $ stat          : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ pvalue        : num  1 1 1 1 1 1 1 1 1 1 ...
##  $ padj          : num  1 1 1 1 1 1 1 1 1 1 ...
##  $ color         : num  1 1 1 1 1 1 1 1 1 1 ...
##  $ trans         : num  0.1 0.1 0.1 0.1 0.1 0.1 0.1 0.1 0.1 0.1 ...
##  $ size          : num  1 1 1 1 1 1 1 1 1 1 ...
ruv_res_con_CD34_pc_com_df$color <- factor(ruv_res_con_CD34_pc_com_df$color)
MAplot_Ctrl_v_CD34 <- ggplot(data = ruv_res_con_CD34_pc_com_df, aes(x = baseMean, y = log2FoldChange, color = color, alpha = trans, size=size)) +
  geom_point() +
  scale_x_continuous(trans = 'log10') +
  scale_alpha_continuous(guide=FALSE) +
  scale_size_continuous(guide=FALSE, range = c(1,3)) +
  scale_color_manual(values=c("black", "red"), guide=FALSE) +
  ylim(c(-7, 7)) +
  geom_hline(yintercept=c(-1,1), linetype="dotted") +
  xlab("mean expression") +
  ylab("log fold change") +
  ggtitle("Control vs. CD34") +
  theme_tufte()
MAplot_Ctrl_v_CD34

pdf("MAplot_Ctrl_v_CD34.pdf", width=6, height = 4, family="ArialMT")
MAplot_Ctrl_v_CD34
dev.off()
## quartz_off_screen 
##                 2

Now for the Control vs. CRISPR plot

# color CD34 red
ruv_res_con_crisp_pc_com$color <- 1
ruv_res_con_crisp_pc_com$color[rownames(ruv_res_con_crisp_pc_com)=="ENSG00000174059"] <- 2

# make non-DE transparent
ruv_res_con_crisp_pc_com$trans <- 0.1
ruv_res_con_crisp_pc_com$trans[which(ruv_res_con_crisp_pc_com$padj < 0.05)] <- 1 

# make CD34 larger
ruv_res_con_crisp_pc_com$size <- 1
ruv_res_con_crisp_pc_com$size[rownames(ruv_res_con_crisp_pc_com)=="ENSG00000174059"] <- 1.5

ruv_res_con_crisp_pc_com_df <- as.data.frame(ruv_res_con_crisp_pc_com)
head(ruv_res_con_crisp_pc_com_df)
##                    baseMean log2FoldChange      lfcSE stat pvalue padj
## ENSG00000000003 2063.928940   -0.258230887 0.04483725    0      1    1
## ENSG00000000419  856.072924    0.498139331 0.05758999    0      1    1
## ENSG00000000457  334.052490   -0.009779321 0.08815098    0      1    1
## ENSG00000000460  706.150361   -0.011057270 0.06383935    0      1    1
## ENSG00000000971    9.708977    0.062959951 0.48445166    0      1    1
## ENSG00000001036 1407.097579    0.209524611 0.04680729    0      1    1
##                 color trans size
## ENSG00000000003     1   0.1    1
## ENSG00000000419     1   0.1    1
## ENSG00000000457     1   0.1    1
## ENSG00000000460     1   0.1    1
## ENSG00000000971     1   0.1    1
## ENSG00000001036     1   0.1    1
str(ruv_res_con_crisp_pc_com_df)
## 'data.frame':    14260 obs. of  9 variables:
##  $ baseMean      : num  2063.93 856.07 334.05 706.15 9.71 ...
##  $ log2FoldChange: num  -0.25823 0.49814 -0.00978 -0.01106 0.06296 ...
##  $ lfcSE         : num  0.0448 0.0576 0.0882 0.0638 0.4845 ...
##  $ stat          : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ pvalue        : num  1 1 1 1 1 1 1 1 1 1 ...
##  $ padj          : num  1 1 1 1 1 1 1 1 1 1 ...
##  $ color         : num  1 1 1 1 1 1 1 1 1 1 ...
##  $ trans         : num  0.1 0.1 0.1 0.1 0.1 0.1 0.1 0.1 0.1 0.1 ...
##  $ size          : num  1 1 1 1 1 1 1 1 1 1 ...
ruv_res_con_crisp_pc_com_df$color <- factor(ruv_res_con_crisp_pc_com_df$color)

MAplot_Ctrl_v_CRISPR <- ggplot(data = ruv_res_con_crisp_pc_com_df, aes(x = baseMean, y = log2FoldChange, color = color, alpha = trans, size=size)) +
  geom_point() +
  scale_x_continuous(trans = 'log10') +
  scale_alpha_continuous(guide=FALSE) +
  scale_size_continuous(guide=FALSE, range = c(1,3)) +
  scale_color_manual(values=c("black", "red"), guide=FALSE) +
  ylim(c(-7, 7)) +
  geom_hline(yintercept=c(-1,1), linetype="dotted") +
  xlab("mean expression") +
  ylab("log fold change") +
  ggtitle("Control vs. CRISPR") +
  theme_tufte()
MAplot_Ctrl_v_CRISPR

pdf("MAplot_Ctrl_v_CRISPR.pdf", width=6, height = 4, family="ArialMT")
MAplot_Ctrl_v_CRISPR
dev.off()
## quartz_off_screen 
##                 2
# remove CD34 highlight
ruv_res_con_crisp_pc_com$size[rownames(ruv_res_con_crisp_pc_com)=="ENSG00000174059"] <- 1
ruv_res_con_crisp_pc_com$color[rownames(ruv_res_con_crisp_pc_com)=="ENSG00000174059"] <- 1
ruv_res_con_crisp_pc_com_df <- as.data.frame(ruv_res_con_crisp_pc_com)
MAplot_Ctrl_v_CRISPR2 <- ggplot(data = ruv_res_con_crisp_pc_com_df, aes(x = baseMean, y = log2FoldChange, color = as.factor(color), alpha = trans)) +
  geom_point() +
  scale_x_continuous(trans = 'log10') +
  scale_alpha_continuous(guide=FALSE) +
  scale_color_manual(values=c("black", "red"), guide=FALSE) +
  ylim(c(-7, 7)) +
  geom_hline(yintercept=c(-1,1), linetype="dotted") +
  xlab("mean expression") +
  ylab("log fold change") +
  ggtitle("Control vs. CRISPR") +
  theme_tufte()
MAplot_Ctrl_v_CRISPR2

pdf("MAplot_Ctrl_v_CRISPR2.pdf", width=6, height = 4, family="ArialMT")
MAplot_Ctrl_v_CRISPR2
dev.off()
## quartz_off_screen 
##                 2

CRISPR vs. CD34 plot

# color CD34 red
ruv_res_crisp_CD34_pc_com$color <- 1
ruv_res_crisp_CD34_pc_com$color[rownames(ruv_res_crisp_CD34_pc_com)=="ENSG00000174059"] <- 2

# make non-DE transparent
ruv_res_crisp_CD34_pc_com$trans <- 0.1
ruv_res_crisp_CD34_pc_com$trans[which(ruv_res_crisp_CD34_pc_com$padj < 0.05)] <- 1 

# make CD34 larger
ruv_res_crisp_CD34_pc_com$size <- 1
ruv_res_crisp_CD34_pc_com$size[rownames(ruv_res_crisp_CD34_pc_com)=="ENSG00000174059"] <- 1.5

ruv_res_crisp_CD34_pc_com_df <- as.data.frame(ruv_res_crisp_CD34_pc_com)
head(ruv_res_crisp_CD34_pc_com_df)
##                    baseMean log2FoldChange      lfcSE stat pvalue padj
## ENSG00000000003 2063.928940     0.12510687 0.04655009    0      1    1
## ENSG00000000419  856.072924     0.02371291 0.05771032    0      1    1
## ENSG00000000457  334.052490     0.04384499 0.09139576    0      1    1
## ENSG00000000460  706.150361     0.04417362 0.06611236    0      1    1
## ENSG00000000971    9.708977    -0.43825865 0.51390673    0      1    1
## ENSG00000001036 1407.097579     0.15407797 0.04753871    0      1    1
##                 color trans size
## ENSG00000000003     1   0.1    1
## ENSG00000000419     1   0.1    1
## ENSG00000000457     1   0.1    1
## ENSG00000000460     1   0.1    1
## ENSG00000000971     1   0.1    1
## ENSG00000001036     1   0.1    1
str(ruv_res_crisp_CD34_pc_com_df)
## 'data.frame':    14260 obs. of  9 variables:
##  $ baseMean      : num  2063.93 856.07 334.05 706.15 9.71 ...
##  $ log2FoldChange: num  0.1251 0.0237 0.0438 0.0442 -0.4383 ...
##  $ lfcSE         : num  0.0466 0.0577 0.0914 0.0661 0.5139 ...
##  $ stat          : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ pvalue        : num  1 1 1 1 1 1 1 1 1 1 ...
##  $ padj          : num  1 1 1 1 1 1 1 1 1 1 ...
##  $ color         : num  1 1 1 1 1 1 1 1 1 1 ...
##  $ trans         : num  0.1 0.1 0.1 0.1 0.1 0.1 0.1 0.1 0.1 0.1 ...
##  $ size          : num  1 1 1 1 1 1 1 1 1 1 ...
ruv_res_crisp_CD34_pc_com_df$color <- factor(ruv_res_crisp_CD34_pc_com_df$color)
MAplot_CRSIPR_v_CD34 <- ggplot(data = ruv_res_crisp_CD34_pc_com_df, aes(x = baseMean, y = log2FoldChange, color = color, alpha = trans, size=size)) +
  geom_point() +
  scale_x_continuous(trans = 'log10') +
  scale_alpha_continuous(guide=FALSE) +
  scale_size_continuous(guide=FALSE, range = c(1,3)) +
  scale_color_manual(values=c("black", "red"), guide=FALSE) +
  ylim(c(-7, 7)) +
  geom_hline(yintercept=c(-1,1), linetype="dotted") +
  xlab("mean expression") +
  ylab("log fold change") +
  ggtitle("CRISPR vs. CD34") +
  theme_tufte()
MAplot_CRSIPR_v_CD34

pdf("MAplot_CRSIPR_v_CD34.pdf", width=6, height = 4, family="ArialMT")
MAplot_CRSIPR_v_CD34
dev.off()
## quartz_off_screen 
##                 2

Now to MAplots highlighting the six genes that claudia is testing.

# color genes
ruv_res_con_crisp_pc_com$color <- 1
ruv_res_con_crisp_pc_com$color[rownames(ruv_res_con_crisp_pc_com)=="ENSG00000174059"] <- 2
ruv_res_con_crisp_pc_com$color[rownames(ruv_res_con_crisp_pc_com)=="ENSG00000104856"] <- 3
ruv_res_con_crisp_pc_com$color[rownames(ruv_res_con_crisp_pc_com)=="ENSG00000077150"] <- 4
ruv_res_con_crisp_pc_com$color[rownames(ruv_res_con_crisp_pc_com)=="ENSG00000175197"] <- 5
ruv_res_con_crisp_pc_com$color[rownames(ruv_res_con_crisp_pc_com)=="ENSG00000171223"] <- 6
ruv_res_con_crisp_pc_com$color[rownames(ruv_res_con_crisp_pc_com)=="ENSG00000175592"] <- 7
ruv_res_con_crisp_pc_com$color[rownames(ruv_res_con_crisp_pc_com)=="ENSG00000101255"] <- 8

# make non-DE transparent
ruv_res_con_crisp_pc_com$trans <- 0.1
ruv_res_con_crisp_pc_com$trans[which(ruv_res_con_crisp_pc_com$padj < 0.05)] <- 1 

# make CD34/genes larger
ruv_res_con_crisp_pc_com$size <- 1
GOI <- c("ENSG00000174059", "ENSG00000101255", "ENSG00000077150", "ENSG00000175197",
         "ENSG00000171223", "ENSG00000175592", "ENSG00000104856")

ruv_res_con_crisp_pc_com$size[rownames(ruv_res_con_crisp_pc_com) %in% GOI] <- 1.5

ruv_res_con_crisp_pc_com_df <- as.data.frame(ruv_res_con_crisp_pc_com)
head(ruv_res_con_crisp_pc_com_df)
##                    baseMean log2FoldChange      lfcSE stat pvalue padj
## ENSG00000000003 2063.928940   -0.258230887 0.04483725    0      1    1
## ENSG00000000419  856.072924    0.498139331 0.05758999    0      1    1
## ENSG00000000457  334.052490   -0.009779321 0.08815098    0      1    1
## ENSG00000000460  706.150361   -0.011057270 0.06383935    0      1    1
## ENSG00000000971    9.708977    0.062959951 0.48445166    0      1    1
## ENSG00000001036 1407.097579    0.209524611 0.04680729    0      1    1
##                 color trans size
## ENSG00000000003     1   0.1    1
## ENSG00000000419     1   0.1    1
## ENSG00000000457     1   0.1    1
## ENSG00000000460     1   0.1    1
## ENSG00000000971     1   0.1    1
## ENSG00000001036     1   0.1    1
str(ruv_res_con_crisp_pc_com_df)
## 'data.frame':    14260 obs. of  9 variables:
##  $ baseMean      : num  2063.93 856.07 334.05 706.15 9.71 ...
##  $ log2FoldChange: num  -0.25823 0.49814 -0.00978 -0.01106 0.06296 ...
##  $ lfcSE         : num  0.0448 0.0576 0.0882 0.0638 0.4845 ...
##  $ stat          : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ pvalue        : num  1 1 1 1 1 1 1 1 1 1 ...
##  $ padj          : num  1 1 1 1 1 1 1 1 1 1 ...
##  $ color         : num  1 1 1 1 1 1 1 1 1 1 ...
##  $ trans         : num  0.1 0.1 0.1 0.1 0.1 0.1 0.1 0.1 0.1 0.1 ...
##  $ size          : num  1 1 1 1 1 1 1 1 1 1 ...
ruv_res_con_crisp_pc_com_df$color <- factor(ruv_res_con_crisp_pc_com_df$color)
levels(ruv_res_con_crisp_pc_com_df$color)
## [1] "1" "2" "3" "4" "5" "6" "7" "8"
MAplot_Ctrl_v_CRISPR_qPCR <- ggplot(data = ruv_res_con_crisp_pc_com_df, aes(x = baseMean, y = log2FoldChange, color = color, alpha = trans, size=size)) +
  geom_point() +
  scale_x_continuous(trans = 'log10') +
  scale_alpha_continuous(guide=FALSE) +
  scale_size_continuous(guide=FALSE, range = c(1,3)) +
  scale_color_manual(values=color_genes, guide=FALSE) +
  ylim(c(-7, 7)) +
  geom_hline(yintercept=c(-1,1), linetype="dotted") +
  xlab("mean expression") +
  ylab("log fold change") +
  ggtitle("Control vs. CRISPR") +
  theme_tufte()
MAplot_Ctrl_v_CRISPR_qPCR

pdf("MAplot_Ctrl_v_CRISPR_qPCR.pdf", width=6, height = 4, family="ArialMT")
MAplot_Ctrl_v_CRISPR_qPCR
dev.off()
## quartz_off_screen 
##                 2

Now to MAplots highlighting the six genes that claudia is testing.

# color genes
ruv_res_con_CD34_pc_com$color <- 1
ruv_res_con_CD34_pc_com$color[rownames(ruv_res_con_CD34_pc_com)=="ENSG00000174059"] <- 2
ruv_res_con_CD34_pc_com$color[rownames(ruv_res_con_CD34_pc_com)=="ENSG00000104856"] <- 3
ruv_res_con_CD34_pc_com$color[rownames(ruv_res_con_CD34_pc_com)=="ENSG00000077150"] <- 4
ruv_res_con_CD34_pc_com$color[rownames(ruv_res_con_CD34_pc_com)=="ENSG00000175197"] <- 5
ruv_res_con_CD34_pc_com$color[rownames(ruv_res_con_CD34_pc_com)=="ENSG00000171223"] <- 6
ruv_res_con_CD34_pc_com$color[rownames(ruv_res_con_CD34_pc_com)=="ENSG00000175592"] <- 7
ruv_res_con_CD34_pc_com$color[rownames(ruv_res_con_CD34_pc_com)=="ENSG00000101255"] <- 8

# make non-DE transparent
ruv_res_con_CD34_pc_com$trans <- 0.1
ruv_res_con_CD34_pc_com$trans[which(ruv_res_con_CD34_pc_com$padj < 0.05)] <- 1 

# make CD34/genes larger
ruv_res_con_CD34_pc_com$size <- 1
GOI <- c("ENSG00000174059", "ENSG00000101255", "ENSG00000077150", "ENSG00000175197",
         "ENSG00000171223", "ENSG00000175592", "ENSG00000104856")

ruv_res_con_CD34_pc_com$size[rownames(ruv_res_con_CD34_pc_com) %in% GOI] <- 1.5

ruv_res_con_CD34_pc_com_df <- as.data.frame(ruv_res_con_CD34_pc_com)
head(ruv_res_con_CD34_pc_com_df)
##                    baseMean log2FoldChange      lfcSE stat pvalue padj
## ENSG00000000003 2063.928940    -0.13312402 0.04538688    0      1    1
## ENSG00000000419  856.072924     0.52185225 0.05863385    0      1    1
## ENSG00000000457  334.052490     0.03406567 0.08978050    0      1    1
## ENSG00000000460  706.150361     0.03311635 0.06495760    0      1    1
## ENSG00000000971    9.708977    -0.37529870 0.50982271    0      1    1
## ENSG00000001036 1407.097579     0.36360258 0.04733630    0      1    1
##                 color trans size
## ENSG00000000003     1   0.1    1
## ENSG00000000419     1   0.1    1
## ENSG00000000457     1   0.1    1
## ENSG00000000460     1   0.1    1
## ENSG00000000971     1   0.1    1
## ENSG00000001036     1   0.1    1
str(ruv_res_con_CD34_pc_com_df)
## 'data.frame':    14260 obs. of  9 variables:
##  $ baseMean      : num  2063.93 856.07 334.05 706.15 9.71 ...
##  $ log2FoldChange: num  -0.1331 0.5219 0.0341 0.0331 -0.3753 ...
##  $ lfcSE         : num  0.0454 0.0586 0.0898 0.065 0.5098 ...
##  $ stat          : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ pvalue        : num  1 1 1 1 1 1 1 1 1 1 ...
##  $ padj          : num  1 1 1 1 1 1 1 1 1 1 ...
##  $ color         : num  1 1 1 1 1 1 1 1 1 1 ...
##  $ trans         : num  0.1 0.1 0.1 0.1 0.1 0.1 0.1 0.1 0.1 0.1 ...
##  $ size          : num  1 1 1 1 1 1 1 1 1 1 ...
ruv_res_con_CD34_pc_com_df$color <- factor(ruv_res_con_CD34_pc_com_df$color)
levels(ruv_res_con_CD34_pc_com_df$color)
## [1] "1" "2" "3" "4" "5" "6" "7" "8"
MAplot_Ctrl_v_CD34_qPCR <- ggplot(data = ruv_res_con_CD34_pc_com_df, aes(x = baseMean, y = log2FoldChange, color = color, alpha = trans, size=size)) +
  geom_point() +
  scale_x_continuous(trans = 'log10') +
  scale_alpha_continuous(guide=FALSE) +
  scale_size_continuous(guide=FALSE, range = c(1,3)) +
  scale_color_manual(values=color_genes, guide=FALSE) +
  ylim(c(-7, 7)) +
  geom_hline(yintercept=c(-1,1), linetype="dotted") +
  xlab("mean expression") +
  ylab("log fold change") +
  ggtitle("Control vs. CD34") +
  theme_tufte()
MAplot_Ctrl_v_CD34_qPCR

pdf("MAplot_Ctrl_v_CD34_qPCR.pdf", width=6, height = 4, family="ArialMT")
MAplot_Ctrl_v_CD34_qPCR
dev.off()
## quartz_off_screen 
##                 2

Outputting the Session Info

sessionInfo()
## R version 3.4.1 (2017-06-30)
## Platform: x86_64-apple-darwin15.6.0 (64-bit)
## Running under: OS X El Capitan 10.11.6
## 
## Matrix products: default
## BLAS: /Library/Frameworks/R.framework/Versions/3.4/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/3.4/Resources/lib/libRlapack.dylib
## 
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
## 
## attached base packages:
##  [1] grid      stats4    parallel  stats     graphics  grDevices utils    
##  [8] datasets  methods   base     
## 
## other attached packages:
##  [1] VennDiagram_1.6.18         futile.logger_1.4.3       
##  [3] ggthemes_3.4.0             ggplot2_2.2.1             
##  [5] RFmarkerDetector_1.0.1     AUCRF_1.1                 
##  [7] randomForest_4.6-12        UpSetR_1.3.3              
##  [9] pheatmap_1.0.8             RColorBrewer_1.1-2        
## [11] genefilter_1.58.1          qvalue_2.8.0              
## [13] RUVSeq_1.10.0              edgeR_3.18.1              
## [15] limma_3.32.10              EDASeq_2.10.0             
## [17] ShortRead_1.34.2           GenomicAlignments_1.12.2  
## [19] Rsamtools_1.28.0           Biostrings_2.44.2         
## [21] XVector_0.16.0             BiocParallel_1.10.1       
## [23] DESeq2_1.16.1              SummarizedExperiment_1.6.5
## [25] DelayedArray_0.2.7         matrixStats_0.52.2        
## [27] Biobase_2.36.2             GenomicRanges_1.28.6      
## [29] GenomeInfoDb_1.12.3        IRanges_2.10.5            
## [31] S4Vectors_0.14.7           BiocGenerics_0.22.1       
## 
## loaded via a namespace (and not attached):
##  [1] colorspace_1.3-2        hwriter_1.3.2          
##  [3] rprojroot_1.2           htmlTable_1.11.0       
##  [5] base64enc_0.1-3         rstudioapi_0.7         
##  [7] bit64_0.9-7             AnnotationDbi_1.38.2   
##  [9] codetools_0.2-15        splines_3.4.1          
## [11] R.methodsS3_1.7.1       DESeq_1.28.0           
## [13] WilcoxCV_1.0-2          geneplotter_1.54.0     
## [15] knitr_1.17              Formula_1.2-2          
## [17] annotate_1.54.0         cluster_2.0.6          
## [19] R.oo_1.21.0             compiler_3.4.1         
## [21] httr_1.3.1              backports_1.1.1        
## [23] assertthat_0.2.0        Matrix_1.2-12          
## [25] lazyeval_0.2.1          acepack_1.4.1          
## [27] htmltools_0.3.6         prettyunits_1.0.2      
## [29] tools_3.4.1             bindrcpp_0.2           
## [31] gtable_0.2.0            glue_1.2.0             
## [33] GenomeInfoDbData_0.99.0 reshape2_1.4.2         
## [35] dplyr_0.7.4             Rcpp_0.12.14           
## [37] gdata_2.18.0            UsingR_2.0-5           
## [39] rtracklayer_1.36.6      stringr_1.2.0          
## [41] gtools_3.5.0            XML_3.98-1.9           
## [43] HistData_0.8-2          zlibbioc_1.22.0        
## [45] MASS_7.3-47             scales_0.5.0.9000      
## [47] aroma.light_3.6.0       lambda.r_1.2           
## [49] yaml_2.1.15             memoise_1.1.0          
## [51] gridExtra_2.3           biomaRt_2.35.8         
## [53] rpart_4.1-11            latticeExtra_0.6-28    
## [55] stringi_1.1.6           RSQLite_2.0            
## [57] checkmate_1.8.5         GenomicFeatures_1.28.5 
## [59] caTools_1.17.1          rlang_0.1.4            
## [61] pkgconfig_2.0.1         bitops_1.0-6           
## [63] evaluate_0.10.1         lattice_0.20-35        
## [65] ROCR_1.0-7              purrr_0.2.4            
## [67] bindr_0.1               labeling_0.3           
## [69] htmlwidgets_0.9         bit_1.1-12             
## [71] plyr_1.8.4              magrittr_1.5           
## [73] R6_2.2.2                gplots_3.0.1           
## [75] Hmisc_4.0-3             DBI_0.7                
## [77] foreign_0.8-69          survival_2.41-3        
## [79] RCurl_1.95-4.10         nnet_7.3-12            
## [81] tibble_1.3.4            futile.options_1.0.0   
## [83] KernSmooth_2.23-15      rmarkdown_1.8          
## [85] progress_1.1.2          locfit_1.5-9.1         
## [87] data.table_1.10.4-3     blob_1.1.0             
## [89] digest_0.6.12           xtable_1.8-2           
## [91] tidyr_0.7.2             R.utils_2.6.0          
## [93] munsell_0.4.3